{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Highlights of the Code\n",
    "\n",
    "1. **Replication of Table E**  \n",
    "   This code replicates Table E from the Internet Appendix of the paper.\n",
    "\n",
    "<br>\n",
    "\n",
    "2. **Forecasting Risk Premiums with Lasso**  \n",
    "   The code demonstrates how to forecast risk premiums using a Lasso Model \n",
    "   and generates the corresponding forecast confidence intervals.\n",
    "\n",
    "<br>\n",
    "\n",
    "3. **175 Stock-Level Predictors as Inputs**  \n",
    "   It utilizes 175 stock-level predictors to produce Lasso-based risk premium forecasts  \n",
    "   along with their forecast standard errors.\n",
    "\n",
    "<br>\n",
    "\n",
    "4. **Model Retraining Schedule**  \n",
    "   - Models are retrained at the start of each year.  \n",
    "   - With out-of-sample data spanning 1987 to 2020, the models are retrained 34 times.  \n",
    "   - **Initial training sample**: 1957 to 1974.  \n",
    "   - The training sample expands recursively each year.  \n",
    "   - **Initial validation sample**: 1975 to 1986.  \n",
    "   - The validation sample advances forward annually.\n",
    "\n",
    "\n",
    "<br>\n",
    "\n",
    "5. **Copyright Protection**  \n",
    "   The code is intended solely for research and non-commercial purposes.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np # linear algebra\n",
    "import multiprocessing as mp\n",
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "import seaborn as sns\n",
    "import numpy\n",
    "import pandas\n",
    "from joblib import Parallel, delayed\n",
    "import keras  \n",
    "from keras import regularizers\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.wrappers.scikit_learn import KerasRegressor\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.model_selection import KFold\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.pipeline import Pipeline\n",
    "from keras.layers import BatchNormalization\n",
    "from keras.layers import Activation\n",
    "from keras import optimizers\n",
    "from sklearn.metrics import r2_score\n",
    "from keras import callbacks\n",
    "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
    "from keras.layers import Dropout\n",
    "from keras.models import load_model\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.linear_model import ElasticNet\n",
    "from numpy.linalg import inv\n",
    "from scipy import stats\n",
    "import scipy\n",
    "from scipy import stats\n",
    "from scipy.optimize import minimize\n",
    "from scipy.stats import invgauss\n",
    "from sklearn import linear_model\n",
    "#from scipy.stats import invgauss\n",
    "import scipy.stats as sc\n",
    "from numpy.linalg import matrix_rank\n",
    "from sklearn.linear_model import LinearRegression\n",
    "#import gc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np # linear algebra\n",
    "import multiprocessing as mp\n",
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "import seaborn as sns\n",
    "import numpy\n",
    "import pandas\n",
    "from joblib import Parallel, delayed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import keras  \n",
    "from keras import regularizers\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.wrappers.scikit_learn import KerasRegressor\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.model_selection import KFold\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.pipeline import Pipeline\n",
    "from keras.layers import BatchNormalization\n",
    "from keras.layers import Activation\n",
    "from keras import optimizers\n",
    "from sklearn.metrics import r2_score\n",
    "from keras import callbacks\n",
    "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
    "from keras.layers import Dropout\n",
    "from keras.models import load_model\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "import warnings \n",
    "  \n",
    "# Settings the warnings to be ignored \n",
    "warnings.filterwarnings('ignore') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Gibbs_sampler_lambda_wls(X, Y_tilde, xerror, errorbetain,h1, h2, n, r, delta):\n",
    "    \n",
    "    beta=np.zeros((n,X.shape[1]))\n",
    "    errorbeta=np.zeros((n,xerror.shape[1]))\n",
    "    tau_sq=np.zeros((n,X.shape[1]))\n",
    "    sigma_sq=np.zeros((n,))\n",
    "    lambda_sq=np.zeros((n,))\n",
    "    # Initialization\n",
    "    errorbeta[0]=errorbetain\n",
    "    beta[0] = np.random.uniform(size = X.shape[1])\n",
    "    sigma_sq[0] = np.random.uniform()\n",
    "    tau_sq[0] = np.random.uniform(size = X.shape[1])\n",
    "    lambda_sq[0] = np.random.uniform()\n",
    "    \n",
    "    #errortrain=ytrain-ytrainpred\n",
    "\n",
    "    #xyerror=xerror.transpose().dot(errortrain)\n",
    "    #errorvar=np.var(errortrain-errorfit)\n",
    "    Aerror= xerror.transpose().dot(xerror)\n",
    "    Aerrorinv=np.linalg.inv(Aerror)\n",
    "    #errorcoef=Aerrorinv.dot(xyerror)\n",
    "    for i in range(n-1):\n",
    "        # Full conditional for beta\n",
    "        Y_error=Y_tilde-xerror.dot(errorbeta[i])\n",
    "        D_tau = np.diag(tau_sq[i])\n",
    "        D_tau_inv=np.linalg.inv(D_tau)\n",
    "        A = X.transpose().dot(X) + D_tau_inv\n",
    "        Ainv=np.linalg.inv(A)\n",
    "        multi_norm_mean = Ainv.dot(X.transpose()).dot(Y_error)\n",
    "        multi_norm_cov = sigma_sq[i] * Ainv\n",
    "        beta[i+1]=(np.random.multivariate_normal(multi_norm_mean, multi_norm_cov))\n",
    "        # Full conditional for sigma_sq\n",
    "        shape = (X.shape[0]-1+X.shape[1])/2\n",
    "        scale = ((Y_error - X.dot(beta[i+1])).dot((Y_error - X.dot(beta[i+1]))) + beta[i+1].transpose().dot(D_tau_inv).dot(beta[i+1]))/2\n",
    "        sigma_sq[i+1]=(sc.invgamma.rvs(a = shape, scale = scale))\n",
    "        # Full conditional for tau_1,...,tau_p\n",
    "        mean = np.sqrt(lambda_sq[i]*sigma_sq[i+1]/beta[i+1]**2)\n",
    "        scale = np.repeat(lambda_sq[i], X.shape[1])\n",
    "        tau_sq[i+1]=(1/np.random.wald(mean, scale))\n",
    "        # Full conditional for lambda_sq\n",
    "        shape = X.shape[1] + r\n",
    "        rate = sum(tau_sq[i+1])/2+delta\n",
    "        lambda_sq[i+1]=(np.random.gamma(shape, 1/rate))\n",
    "        \n",
    "        errortrain=Y_tilde-X.dot(beta[i+1])\n",
    "        xyerror=xerror.transpose().dot(errortrain)\n",
    "        errorbeta_mean=Aerrorinv.dot(xyerror)\n",
    "        errorvar=np.var(Y_tilde-X.dot(beta[i+1])-xerror.dot(errorbeta[i]))\n",
    "        errorbeta[i+1]=np.random.multivariate_normal(h1*errorbeta_mean, h2*Aerrorinv*errorvar)\n",
    "    return beta[int(n/2):]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rsq_oos(y,ypred): \n",
    "    SSR = np.sum((y-ypred)**2)\n",
    "    MSS = np.sum((y)**2)\n",
    "    r_squared =1-SSR/MSS \n",
    "    return r_squared"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import linear_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def identify_near_constant_predictors(df, threshold=0.001):\n",
    "    # Calculate the ratio of unique values to the total number of values\n",
    "    unique_ratio = df.apply(lambda x: len(x.unique()) / len(x))\n",
    "    \n",
    "    # Identify columns where the ratio of unique values is below the threshold\n",
    "    near_constant_columns = unique_ratio[unique_ratio < threshold].index.tolist()\n",
    "    \n",
    "    return near_constant_columns\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def signals_and_confidence_levels():\n",
    "    \n",
    "    for M in range(1,31):\n",
    "        np.random.seed(0)\n",
    "        gg1=pd.read_csv('twdata{}.csv'.format(M))\n",
    "        gg2=pd.read_csv('vdata{}.csv'.format(M))\n",
    "        gg3=pd.read_csv('tedata{}.csv'.format(M))\n",
    "\n",
    "        gg1['retmkt']=gg1['retmkt']-gg1['retmkt'].mean()\n",
    "        gg1['mktsize']=gg1['retmkt']*gg1['mvel1']\n",
    "        gg1['mktbm']=gg1['retmkt']*gg1['bm']\n",
    "        gg1['mktmom']=gg1['retmkt']*gg1['mom12m']\n",
    "        \n",
    "        pp=(gg1.columns.values)\n",
    "        gg1[\"yyyymm\"]= gg1[\"yyyymm\"].astype(str) \n",
    "        gg2[\"yyyymm\"]= gg2[\"yyyymm\"].astype(str) \n",
    "        gg3[\"yyyymm\"]= gg3[\"yyyymm\"].astype(str) \n",
    "        for k in range(97,105):\n",
    "            gg1[pp[k]]=(gg1[pp[k]]-np.mean(gg1[pp[k]]))/np.std(gg1[pp[k]])\n",
    "            gg2[pp[k]]=(gg2[pp[k]]-np.mean(gg1[pp[k]]))/np.std(gg1[pp[k]])\n",
    "            gg3[pp[k]]=(gg3[pp[k]]-np.mean(gg1[pp[k]]))/np.std(gg1[pp[k]]) \n",
    "        \n",
    "        \n",
    "        gg1=gg1.drop(columns=['count'])\n",
    "        gg2=gg2.drop(columns=['count'])\n",
    "        gg3=gg3.drop(columns=['count'])\n",
    "        gg1.columns[(gg1 == 0).all()]\n",
    "\n",
    "     \n",
    "        \n",
    "        drop_col_names=identify_near_constant_predictors(gg1.iloc[:,3:97])\n",
    "        gg1=gg1.drop(columns=drop_col_names)\n",
    "        gg2=gg2.drop(columns=drop_col_names)\n",
    "        gg3=gg3.drop(columns=drop_col_names)\n",
    "        \n",
    "        ##Dropping predictors that are nearly contant \n",
    "        nunique = gg1.nunique()\n",
    "        cols_to_drop = nunique[nunique == 1].index\n",
    "        gg1=gg1.drop(cols_to_drop, axis=1)\n",
    "        gg2=gg2.drop(cols_to_drop, axis=1)\n",
    "        gg3=gg3.drop(cols_to_drop, axis=1)\n",
    "        \n",
    "        \n",
    "        \n",
    "        \n",
    "        pp=(gg2.columns.values)\n",
    "        num_cols=pp[3:].size+3\n",
    "\n",
    "        \n",
    "        xtrain=((gg1.iloc[:,3:num_cols]))\n",
    "        xtest=((gg2.iloc[:,3:num_cols]))\n",
    "        xoos=(gg3.iloc[:,3:num_cols])\n",
    "        xerror=gg1[['retmkt','mktsize','mktbm','mktmom']]\n",
    "        ytrain=(gg1.iloc[:,2])\n",
    "\n",
    "        ytest=gg2.iloc[:,2]\n",
    "        yoos=gg3.iloc[:,2]\n",
    "\n",
    "        \n",
    "        ##Standardizing data\n",
    "        ytraind=(ytrain-np.mean(ytrain,axis=0))/np.std(ytrain)\n",
    "        ytestd=(ytest-np.mean(ytrain,axis=0))/np.std(ytrain)\n",
    "        xtraind=(xtrain-np.mean(xtrain,axis=0))/np.std(xtrain)\n",
    "        xtestd=(xtest-np.mean(xtrain,axis=0))/np.std(xtrain)\n",
    "        xoosd=(xoos-np.mean(xtrain,axis=0))/np.std(xtrain)\n",
    "    \n",
    "        ##Fitting a frequentist Lasso just for comparison sake.\n",
    "        lasso=linear_model.Lasso(alpha=0.0001, fit_intercept=False)\n",
    "        lasso.fit(xtraind, ytraind) \n",
    "        lasso_cv_coef = lasso.coef_\n",
    "        ytrainpred=lasso.predict(xtraind)*np.std(ytrain)+np.mean(ytrain)\n",
    "        ytestpred=lasso.predict(xtestd)*np.std(ytrain)+np.mean(ytrain)\n",
    "        yoospred=xoosd.dot(lasso.coef_)*np.std(ytrain)+np.mean(ytrain)\n",
    "        errortrain=ytrain-ytrainpred\n",
    "\n",
    "        reg = LinearRegression().fit(xerror, errortrain)\n",
    "        errorfit=reg.predict(xerror)\n",
    "        errortrain=ytrain-ytrainpred\n",
    "\n",
    "        xyerror=xerror.transpose().dot(errortrain)\n",
    "        errorvar=np.var(errortrain-errorfit)\n",
    "        Aerror= xerror.transpose().dot(xerror)\n",
    "        Aerrorinv=np.linalg.inv(Aerror)\n",
    "\n",
    "\n",
    "        errorbetain=0*Aerrorinv.dot(xyerror) ##Initiating factor loading coeffiicents (a_0,a_1,a_2,a_3) in the paper        \n",
    "        beta_gibbs=Gibbs_sampler_lambda_wls(xtraind, ytraind, xerror, errorbetain,1,1, 100, 0.15,0.2)\n",
    "        beta_bayes=np.median(beta_gibbs,axis=0)\n",
    "        yoosfreq=xoosd.dot(lasso_cv_coef)*np.std(ytrain)+np.mean(ytrain) ##frequentist lasso-based risk premium forecasts\n",
    "        yoosbayes=xoosd.dot(beta_bayes)*np.std(ytrain)+np.mean(ytrain) ##Bayesian Lass0-based risk premium forecasts\n",
    "        \n",
    "        yoosbayes[yoosbayes>1]=1\n",
    "        yoosbayes[yoosbayes<-1]=-1\n",
    "        \n",
    "        yoos_gibbs=np.zeros((50,yoos.shape[0]))\n",
    "\n",
    "        for i in range(50):\n",
    "\n",
    "            yoos_gibbs[i]=xoosd.dot(beta_gibbs[i])*np.std(ytrain)+np.mean(ytrain)\n",
    "        \n",
    "        yoos_gibbs[yoos_gibbs>1]=1\n",
    "        yoos_gibbs[yoos_gibbs<-1]=-1\n",
    "\n",
    "        \n",
    "        yoosconf=np.std(yoos_gibbs,axis=0) ##Forecast Standard Errors\n",
    "        ybayesconf=pd.DataFrame(yoosconf, columns = ['ybayesconf'])\n",
    "\n",
    "        date_and_permno = gg3.iloc[0:, [0,1]]\n",
    "        yoosc=gg3.iloc[0:,2:3]\n",
    "        yoosfreqd=yoosfreq.to_frame(name=\"yoosfreq\")\n",
    "        yoosbayesd=yoosbayes.to_frame(name=\"yoosbayesd\")\n",
    "  \n",
    "\n",
    "\n",
    "        mergeout = np.concatenate((date_and_permno,yoosfreqd,yoosbayesd,ybayesconf,yoosc,gg3.iloc[0:, 3:4]),axis=1)\n",
    "        mergeout = pd.DataFrame(mergeout)\n",
    "        mergeout.to_csv('D:/bayesian jmp/lasso_output_rr/lassorollrw{}.csv'.format(M), index=False, header=False)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "signals_and_confidence_levels()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## **More Recent Sample**\n",
    "\n",
    "In the most recent sample uploaded by GKX, covering the years 2017–2020:\n",
    "\n",
    "1. The **price delay variable** is not available.  \n",
    "2. The **order of the first few columns** differs from the earlier sample.  \n",
    "3. Some **macroeconomic variables are not expressed in the same (log) scale** as in the initial sample (1957–2016) analyzed in the paper.  \n",
    "\n",
    "So, adjust the earlier code accordingly to **account for these differences**.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def signals_and_confidence_levels_last_sample():\n",
    "    \n",
    "    for M in range(31,35):\n",
    "        np.random.seed(0)\n",
    "        gg1=pd.read_csv('twdata{}.csv'.format(M))\n",
    "        gg2=pd.read_csv('vdata{}.csv'.format(M))\n",
    "        gg3=pd.read_csv('tedata{}.csv'.format(M))\n",
    "        \n",
    "        gg3['dp']=np.log(gg3['dp'])\n",
    "        gg2['dp']=np.log(gg2['dp'])\n",
    "        gg1['dp']=np.log(gg1['dp'])\n",
    "        \n",
    "        \n",
    "        gg3['earntoprice']=np.log(gg3['earntoprice'])\n",
    "        gg2['earntoprice']=np.log(gg2['earntoprice'])\n",
    "        gg1['earntoprice']=np.log(gg1['earntoprice'])\n",
    "        \n",
    "        \n",
    "        gg1['retmkt']=gg1['retmkt']-gg1['retmkt'].mean()\n",
    "        gg1['mktsize']=gg1['retmkt']*gg1['mvel1']\n",
    "        gg1['mktbm']=gg1['retmkt']*gg1['bm']\n",
    "        gg1['mktmom']=gg1['retmkt']*gg1['mom12m']\n",
    "        \n",
    "        pp=(gg1.columns.values)\n",
    "        gg1[\"yyyymm\"]= gg1[\"yyyymm\"].astype(str) \n",
    "        gg2[\"yyyymm\"]= gg2[\"yyyymm\"].astype(str) \n",
    "        gg3[\"yyyymm\"]= gg3[\"yyyymm\"].astype(str) \n",
    "        for k in range(96,104):\n",
    "            gg1[pp[k]]=(gg1[pp[k]]-np.mean(gg1[pp[k]]))/np.std(gg1[pp[k]])\n",
    "            gg2[pp[k]]=(gg2[pp[k]]-np.mean(gg1[pp[k]]))/np.std(gg1[pp[k]])\n",
    "            gg3[pp[k]]=(gg3[pp[k]]-np.mean(gg1[pp[k]]))/np.std(gg1[pp[k]]) \n",
    "        \n",
    "        \n",
    "        gg1=gg1.drop(columns=['count'])\n",
    "        gg2=gg2.drop(columns=['count'])\n",
    "        gg3=gg3.drop(columns=['count'])\n",
    "        gg1.columns[(gg1 == 0).all()]\n",
    "\n",
    "     \n",
    "        \n",
    "        drop_col_names=identify_near_constant_predictors(gg1.iloc[:,3:96])\n",
    "        gg1=gg1.drop(columns=drop_col_names)\n",
    "        gg2=gg2.drop(columns=drop_col_names)\n",
    "        gg3=gg3.drop(columns=drop_col_names)\n",
    "        \n",
    "        \n",
    "        nunique = gg1.nunique()\n",
    "        #nunique\n",
    "        cols_to_drop = nunique[nunique == 1].index\n",
    "\n",
    "        gg1=gg1.drop(cols_to_drop, axis=1)\n",
    "        gg2=gg2.drop(cols_to_drop, axis=1)\n",
    "        gg3=gg3.drop(cols_to_drop, axis=1)\n",
    "        \n",
    "        pp=(gg2.columns.values)\n",
    "        num_cols=pp[3:].size+3\n",
    "\n",
    "        \n",
    "        xtrain=((gg1.iloc[:,3:num_cols]))\n",
    "        xtest=((gg2.iloc[:,3:num_cols]))\n",
    "        xoos=(gg3.iloc[:,3:num_cols])\n",
    "        xerror=gg1[['retmkt','mktsize','mktbm','mktmom']]\n",
    "        #xerror=(xerror-np.mean(xerror))\n",
    "        ytrain=(gg1.iloc[:,1])\n",
    "\n",
    "        ytest=gg2.iloc[:,1]\n",
    "        yoos=gg3.iloc[:,1]\n",
    "\n",
    "        ##Standardizing data\n",
    "        ytraind=(ytrain-np.mean(ytrain,axis=0))/np.std(ytrain)\n",
    "        ytestd=(ytest-np.mean(ytrain,axis=0))/np.std(ytrain)\n",
    "        xtraind=(xtrain-np.mean(xtrain,axis=0))/np.std(xtrain)\n",
    "        xtestd=(xtest-np.mean(xtrain,axis=0))/np.std(xtrain)\n",
    "        xoosd=(xoos-np.mean(xtrain,axis=0))/np.std(xtrain)\n",
    "    \n",
    "        ##Fitting a frequentist Lasso just for comparison sake.\n",
    "        lasso=linear_model.Lasso(alpha=0.0001, fit_intercept=False)\n",
    "        lasso.fit(xtraind, ytraind)\n",
    "        lasso_cv_coef = lasso.coef_\n",
    "        ytrainpred=lasso.predict(xtraind)*np.std(ytrain)+np.mean(ytrain)\n",
    "        ytestpred=lasso.predict(xtestd)*np.std(ytrain)+np.mean(ytrain)\n",
    "\n",
    "        yoospred=xoosd.dot(lasso.coef_)*np.std(ytrain)+np.mean(ytrain)\n",
    "        errortrain=ytrain-ytrainpred\n",
    "\n",
    "        reg = LinearRegression().fit(xerror, errortrain)\n",
    "        errorfit=reg.predict(xerror)\n",
    "        errortrain=ytrain-ytrainpred\n",
    "\n",
    "        xyerror=xerror.transpose().dot(errortrain)\n",
    "        errorvar=np.var(errortrain-errorfit)\n",
    "        Aerror= xerror.transpose().dot(xerror)\n",
    "        Aerrorinv=np.linalg.inv(Aerror)\n",
    "\n",
    "\n",
    "        errorbetain=0*Aerrorinv.dot(xyerror)  ##Initiating factor loading coeffiicents (a_0,a_1,a_2,a_3) in the paper       \n",
    "        \n",
    "        yoospred=xoosd.dot(lasso.coef_)*np.std(ytrain)+np.mean(ytrain)\n",
    "        beta_gibbs=Gibbs_sampler_lambda_wls(xtraind, ytraind, xerror, errorbetain,1,1, 100, 0.15,0.2)\n",
    "        beta_bayes=np.median(beta_gibbs,axis=0)\n",
    "        yoosfreq=xoosd.dot(lasso_cv_coef)*np.std(ytrain)+np.mean(ytrain) ##frequentist lasso-based risk premium forecasts\n",
    "        yoosbayes=xoosd.dot(beta_bayes)*np.std(ytrain)+np.mean(ytrain) ##Bayesian lasso-based risk premium forecasts\n",
    "        \n",
    "        yoosbayes[yoosbayes>1]=1\n",
    "        yoosbayes[yoosbayes<-1]=-1\n",
    "        \n",
    "        yoos_gibbs=np.zeros((50,yoos.shape[0]))\n",
    "\n",
    "        for i in range(50):\n",
    "\n",
    "            yoos_gibbs[i]=xoosd.dot(beta_gibbs[i])*np.std(ytrain)+np.mean(ytrain)\n",
    "        \n",
    "        yoos_gibbs[yoos_gibbs>1]=1\n",
    "        yoos_gibbs[yoos_gibbs<-1]=-1\n",
    "\n",
    "        \n",
    "        yoosconf=np.std(yoos_gibbs,axis=0)\n",
    "        ybayesconf=pd.DataFrame(yoosconf, columns = ['ybayesconf'])\n",
    "\n",
    "        date_and_permno = gg3.iloc[0:, [0,2]]\n",
    "        yoosc=gg3.iloc[0:,1:2]\n",
    "        yoosfreqd=yoosfreq.to_frame(name=\"yoosfreq\")\n",
    "        yoosbayesd=yoosbayes.to_frame(name=\"yoosbayesd\")\n",
    "  \n",
    "\n",
    "\n",
    "        mergeout = np.concatenate((date_and_permno,yoosfreqd,yoosbayesd,ybayesconf,yoosc,gg3.iloc[0:, 3:4]),axis=1)\n",
    "        mergeout = pd.DataFrame(mergeout)\n",
    "        mergeout.to_csv('D:/bayesian jmp/lasso_output_rr/lassorollrw{}.csv'.format(M), index=False, header=False)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "signals_and_confidence_levels_last_sample()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
